package com.elookinto.amz;


//http://www.amazon.com/s/?rh=n%3A3760901%2Ck%3Aanti+aging&sort=salesrank

import com.gargoylesoftware.htmlunit.ProxyConfig;
import java.io.PrintStream;
import java.util.HashSet;
import java.util.Set;

import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Baike {

    public static void main(String s[]) throws Exception {

        loginPage();

    }

    final static WebClient webClient = new WebClient();
     static {
         ProxyConfig proxyConfig = new ProxyConfig();
        // proxyConfig.setProxyHost("proxy.entp.attws.com");
         //proxyConfig.setProxyPort(8080);
         //webClient.getOptions().setProxyConfig(proxyConfig);
         webClient.getOptions().setJavaScriptEnabled(false);
     }
    //http://groups.wenxuecity.com/groupview.php?gid=1562#discussion
    //http://groups.wenxuecity.com/groupview.php?gid=1502
    //http://groups.wenxuecity.com/groupview.php?gid=1173
     
    public static void loginPage() throws Exception {
        PrintStream sysout = new PrintStream(System.out, true, "UTF-8");
         
       // webClient.set
        Set<String> uids = new HashSet<String>();

        HtmlPage page = webClient.getPage("http://translate.google.com/translate?sl=auto&tl=en&js=n&prev=_t&hl=en&ie=UTF-8&eotf=1&u=http%3A%2F%2Fbaike.baidu.com%2Fview%2F1.htm");
         

         String text = page.asXml();
         /*int a = text.indexOf("atfResults");
         text = text.substring(a);
         java.util.regex.Pattern p = Pattern.compile("/dp/(.*?)/");
         Matcher m = p.matcher(text);
         while (m.find())
         {
             System.out.println(m.group(1).toString());
         }*/
        sysout.println(page.asXml());

       

    }

    public static void homePage() throws Exception {
        PrintStream sysout = new PrintStream(System.out, true, "UTF-8");
        final WebClient webClient = new WebClient();
        Set<String> uids = new HashSet<String>();
        for (int k = 0; k < 10; k++) {
            final HtmlPage page = webClient
                    .getPage("http://bbs.wenxuecity.com/znjy/?page=" + k);

            Thread.currentThread().sleep(5000);
            DomNodeList<DomElement> dl = page.getElementsByTagName("a");
            for (int i = 0; i < dl.getLength(); i++) {
                HtmlAnchor ha = (HtmlAnchor) dl.get(i);
                String s = ha.getAttribute("class");
                if (s.equals("nickname")) {
                    // sysout.println(ha.asText());
                    uids.add(ha.asText());
                }

            }
            sysout.println(uids.size());
        }
        // Iterator<String> it = uids.iterator();

    }
}
